library(here)
## here() starts at /Users/zohrazahir/Documents/GitHub/Species_distributions_and_traits
source(here("functions/R_package_check.R"))
## Loading required package: tidyverse
## Warning in library(package, lib.loc = lib.loc, character.only = TRUE,
## logical.return = TRUE, : there is no package called 'tidyverse'
## also installing the dependencies 'purrr', 'gargle', 'ids', 'selectr', 'dbplyr', 'dtplyr', 'googledrive', 'googlesheets4', 'haven', 'modelr', 'reprex', 'rvest', 'covr', 'feather'
##
## The downloaded binary packages are in
## /var/folders/x0/cz4tb35n5fbb5y4zh6srtrt00000gn/T//RtmpY52V84/downloaded_packages
## ── Attaching packages
## ───────────────────────────────────────
## tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0 ✔ purrr 1.0.1
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.4.1
## ✔ readr 2.1.3 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## Loading required package: vegan
##
## Loading required package: permute
##
## Loading required package: lattice
##
## This is vegan 2.6-4
##
## Loading required package: lubridate
##
##
## Attaching package: 'lubridate'
##
##
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
##
##
## Loading required package: lmodel2
## Warning in library(package, lib.loc = lib.loc, character.only = TRUE,
## logical.return = TRUE, : there is no package called 'lmodel2'
##
## The downloaded binary packages are in
## /var/folders/x0/cz4tb35n5fbb5y4zh6srtrt00000gn/T//RtmpY52V84/downloaded_packages
## Loading required package: openxlsx
## Loading required package: knitr
## Loading required package: PBSmapping
## Warning in library(package, lib.loc = lib.loc, character.only = TRUE,
## logical.return = TRUE, : there is no package called 'PBSmapping'
##
## The downloaded binary packages are in
## /var/folders/x0/cz4tb35n5fbb5y4zh6srtrt00000gn/T//RtmpY52V84/downloaded_packages
##
## -----------------------------------------------------------
## PBS Mapping 2.73.2 -- Copyright (C) 2003-2023 Fisheries and Oceans Canada
##
## PBS Mapping comes with ABSOLUTELY NO WARRANTY;
## for details see the file COPYING.
## This is free software, and you are welcome to redistribute
## it under certain conditions, as outlined in the above file.
##
## A complete user guide 'PBSmapping-UG.pdf' is located at
## /Library/Frameworks/R.framework/Versions/4.2/Resources/library/PBSmapping/doc/PBSmapping-UG.pdf
##
## Packaged on 2022-09-06
## Pacific Biological Station, Nanaimo
##
## All available PBS packages can be found at
## https://github.com/pbs-software
##
## To see demos, type '.PBSfigs()'.
## -----------------------------------------------------------
##
##
## Loading required package: cowplot
##
## Attaching package: 'cowplot'
##
## The following object is masked from 'package:lubridate':
##
## stamp
##
## Loading required package: viridis
## Loading required package: viridisLite
## Loading required package: ggbreak
## Warning in library(package, lib.loc = lib.loc, character.only = TRUE,
## logical.return = TRUE, : there is no package called 'ggbreak'
## also installing the dependencies 'magick', 'ggimage', 'prettydoc'
##
## The downloaded binary packages are in
## /var/folders/x0/cz4tb35n5fbb5y4zh6srtrt00000gn/T//RtmpY52V84/downloaded_packages
## ggbreak v0.1.1
##
## If you use ggbreak in published research, please cite the following
## paper:
##
## S Xu, M Chen, T Feng, L Zhan, L Zhou, G Yu. Use ggbreak to effectively
## utilize plotting space to deal with large datasets and outliers.
## Frontiers in Genetics. 2021, 12:774846. doi: 10.3389/fgene.2021.774846
## Loading required package: plotly
##
## Attaching package: 'plotly'
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following object is masked from 'package:graphics':
##
## layout
##
## Loading required package: Polychrome
## Warning in library(package, lib.loc = lib.loc, character.only = TRUE,
## logical.return = TRUE, : there is no package called 'Polychrome'
## also installing the dependency 'scatterplot3d'
##
## The downloaded binary packages are in
## /var/folders/x0/cz4tb35n5fbb5y4zh6srtrt00000gn/T//RtmpY52V84/downloaded_packages
prepareLibrary()
source(here("functions/QOL_toolkit.R"))
theme_set(theme_bw())
set_here(path = "..")
## File .here already exists in /Users/zohrazahir/Documents/GitHub/Species_distributions_and_traits
# Load trait dataset metadata tables
directory.trait <- openxlsx::read.xlsx(here("tables/trait_directory_20230207.xlsx"))
directory.lifestage <- openxlsx::read.xlsx(here("tables/lifestage_directory_20230207.xlsx"))
taxonomy <- openxlsx::read.xlsx(here("tables/taxonomy_table_20230207.xlsx"))
dataset.format <- openxlsx::read.xlsx(here("tables/trait_dataset_standard_format_20230118.xlsx"))[-1,]
# Load the trait dataset
load(here("data/zooplankton_traits-2023-02-12.RData"))
The trait dataset is provided in two levels. Level 1 (traits.lvl1) contains individual-level records which can have multiple measurements of a trait for a species. Level 2 (traits.lvl2) contains species-level averages for numerical traits. Only taxa at the species or subspecies ranks are provided in the level 2 data. For the exploration below, we will look into the level 2 data.
# Assign the trait data level to analyze
zooplankton.traits <- traits.lvl2
# Structure of the dataset
# The dataset starts with identifiers (cols 1-5), followed by the core trait information of the scientific name, trait name, trait value, trait unit, and data type. Many traits have ancillary information (cols 11-26) such as life stage, temperature, and notes. This are followed by the references (cols 27-30) and the taxonomic information (31-41) associated with the trait record. All taxa have a taxonID and most have an aphiaID to link it with the WoRMS database.Details regarding the data provenance of the original trait record are retained in columns 42-54.
colnames(zooplankton.traits)
## [1] "traitTaxonID" "traitID"
## [3] "taxonID" "stageID"
## [5] "taxonRankID" "scientificName"
## [7] "traitName" "traitValue"
## [9] "traitUnit" "dataType"
## [11] "lifeStage" "traitValueTemperature"
## [13] "traitValueSD" "traitValueN"
## [15] "sizeType" "sizeAssocName"
## [17] "sizeAssocUnit" "sizeAssocValue"
## [19] "sizeAssocSD" "sizeAssocN"
## [21] "sizeAssocReference" "location"
## [23] "longitude" "latitude"
## [25] "notes" "observationNumber"
## [27] "primaryReference" "secondaryReference"
## [29] "primaryReferenceDOI" "secondaryReferenceDOI"
## [31] "aphiaID" "aphiaAuthority"
## [33] "rank" "kingdom"
## [35] "phylum" "class"
## [37] "order" "family"
## [39] "genus" "species"
## [41] "majorgroup" "traitValueSource"
## [43] "traitValueMeasurementNotes" "uploadBy"
## [45] "uploadDate" "maxObsNum"
## [47] "verbatimScientificName" "verbatimTraitName"
## [49] "verbatimTraitValue" "verbatimTraitUnit"
## [51] "verbatimLifeStage" "verbatimNotes"
## [53] "isMerged" "mergedTraitTaxonIDs"
# What traits are available?
# The current dataset has 36 traits distributed to 81 traitNames and assigned to 5 general trait buckets. Traits have a dataset-specific traitName which were assigned with standardized trait units. A single trait may be represented in multiple ways and thus have unique traitIDs and traitNames (e.g., the nitrogen content trait can be "nitrogenTotal" which is the bulk total nitrogen of an individual in mg or "nitrogenPDW" which is the percent of nitrogen relative to the dry weight).
length(unique(directory.trait$traitName))
## [1] 83
length(unique(directory.trait$trait))
## [1] 36
unique(directory.trait$traitBucket)
## [1] "morphological" "composition" "behavioral" "physiological"
## [5] "life history"
unique(directory.trait$dataType)
## [1] "continuous" "categorical" "binary"
# Table of traits
kable(select(directory.trait, traitBucket, trait, traitName, traitUnit, dataType))
| traitBucket | trait | traitName | traitUnit | dataType |
|---|---|---|---|---|
| morphological | body length | bodyLengthMax | mm | continuous |
| morphological | dry weight | dryWeight | mg | continuous |
| morphological | wet weight | wetWeight | mg | continuous |
| morphological | ash free dry weight | afdwPDW | percent | continuous |
| morphological | carbon weight | carbonWeight | mg | continuous |
| morphological | water content | waterPWW | percent | continuous |
| morphological | water content | dryWeightPWW | percent | continuous |
| morphological | myelination | myelination | NA | categorical |
| morphological | bioluminescence | bioluminescence | NA | categorical |
| composition | carbon content | carbonPDW | percent | continuous |
| composition | carbon content | carbonPWW | percent | continuous |
| composition | nitrogen content | nitrogenTotal | mg | continuous |
| composition | nitrogen content | nitrogenPDW | percent | continuous |
| composition | phosphorus content | phosphorusTotal | mg | continuous |
| composition | phosphorus content | phosphorusPDW | percent | continuous |
| composition | elemental ratio | ratioCN | NA | continuous |
| composition | elemental ratio | ratioCP | NA | continuous |
| composition | elemental ratio | ratioNP | NA | continuous |
| composition | ash content | ashTotal | mg | continuous |
| composition | ash content | ashPDW | percent | continuous |
| composition | lipid content | lipidPDW | percent | continuous |
| composition | lipid content | lipidPWW | percent | continuous |
| composition | protein content | proteinPDW | percent | continuous |
| composition | protein content | proteinPWW | percent | continuous |
| composition | carbohydrate content | carbohydratePDW | percent | continuous |
| composition | proximate ratio | ratioProteinLipid | NA | continuous |
| composition | energy content | energyContent | J | continuous |
| composition | energy density | energyDensityDW | J mg DW^-1 | continuous |
| composition | energy density | energyDensityWW | J mg WW^-1 | continuous |
| composition | energy density | energyDensityVolume | J m^-3 | continuous |
| behavioral | clearance rate | clearanceRate_15C | ml h^-1 | continuous |
| behavioral | clearance rate | clearanceRate_WSC_15C | ml mg C^-1 h^-1 | continuous |
| behavioral | ingestion rate | ingestionRate_15C | ug C h^-1 | continuous |
| behavioral | ingestion rate | ingestionRate_WSC_15C | ug C mg C^-1 h^-1 | continuous |
| physiological | respiration rate | respirationRate_15C | ul O2 h^-1 | continuous |
| physiological | respiration rate | respirationRate_WSDW_15C | ul O2 mg DW^-1 h^-1 | continuous |
| physiological | respiration rate | respirationRate_WSWW_15C | ul O2 mg WW^-1 h^-1 | continuous |
| physiological | respiration rate | respirationRate_WSC_15C | ul O2 mg C^-1 h^-1 | continuous |
| physiological | excretion rate N | excretionRateN_15C | ug N-NH4+ h^-1 | continuous |
| physiological | excretion rate N | excretionRateN_WSDW_15C | ug N-NH4+ mg DW^-1 h^-1 | continuous |
| physiological | excretion rate P | excretionRateP_15C | ug P-PO43- h^-1 | continuous |
| physiological | excretion rate P | excretionRateP_WSDW_15C | ug P-PO43- mg DW^-1 h^-1 | continuous |
| physiological | growth rate | growthRate_15C | mg C h^-1 | continuous |
| physiological | growth rate | growthRate_WSC_15C | mg C mg C^-1 h^-1 | continuous |
| behavioral | feeding mode | feedingMode | NA | categorical |
| behavioral | reproduction mode | reproductionMode | NA | categorical |
| behavioral | trophic group | trophicGroup | NA | categorical |
| behavioral | vertical distribution | verticalDistribution | NA | categorical |
| behavioral | habitat association | habitatAssociation | NA | categorical |
| behavioral | diel vertical migration | dielVerticalMigration | NA | categorical |
| life history | hibernation | hibernationStage | NA | categorical |
| life history | clutch size | clutchSize | NA | continuous |
| life history | fecundity | fecundity | NA | continuous |
| life history | egg size | eggWeight | ug | continuous |
| life history | egg size | eggDiameter | um | continuous |
| life history | development duration | developmentDuration | day | continuous |
| morphological | myelination | MYE.absent | NA | binary |
| morphological | myelination | MYE.present | NA | binary |
| morphological | bioluminescence | BIO.absent | NA | binary |
| morphological | bioluminescence | BIO.present | NA | binary |
| behavioral | feeding mode | FM.passive | NA | binary |
| behavioral | feeding mode | FM.active | NA | binary |
| behavioral | feeding mode | FM.cruise | NA | binary |
| behavioral | feeding mode | FM.current | NA | binary |
| behavioral | feeding mode | FM.ambush | NA | binary |
| behavioral | feeding mode | FM.active.ambush | NA | binary |
| behavioral | feeding mode | FM.passive.ambush | NA | binary |
| behavioral | feeding mode | FM.particle.feeder | NA | binary |
| behavioral | feeding mode | FM.parasite | NA | binary |
| behavioral | feeding mode | FM.commensal | NA | binary |
| behavioral | reproduction mode | RM.broadcasting | NA | binary |
| behavioral | reproduction mode | RM.brooding | NA | binary |
| behavioral | reproduction mode | RM.asexual | NA | binary |
| behavioral | trophic group | TG.carnivore | NA | binary |
| behavioral | trophic group | TG.omnivore | NA | binary |
| behavioral | trophic group | TG.herbivore | NA | binary |
| behavioral | trophic group | TG.detritivore | NA | binary |
| behavioral | diel vertical migration | DVM.absent | NA | binary |
| behavioral | diel vertical migration | DVM.present | NA | binary |
| behavioral | diel vertical migration | DVM.reverse | NA | binary |
| behavioral | diel vertical migration | DVM.weak | NA | binary |
| behavioral | diel vertical migration | DVM.strong | NA | binary |
| life history | hibernation | HIB.present | NA | binary |
# The trait dataset contains information in 3 data types. "Continuous" are numerical traits while categorical traits are coded as characters. Categorical traits may also be represented as binary records which can be filtered using dataType == "binary". For binary records, 1 represents presence of a trait and 0 represents absence.
unique(directory.trait$dataType)
## [1] "continuous" "categorical" "binary"
# For now, we can review the coverage of traits without the binary traits.
zooplankton.traits %>%
filter(dataType == "binary") %>%
distinct(traitName)
## # A tibble: 27 × 1
## traitName
## <chr>
## 1 TG.omnivore
## 2 TG.herbivore
## 3 TG.carnivore
## 4 TG.detritivore
## 5 FM.passive
## 6 FM.active.ambush
## 7 FM.ambush
## 8 FM.active
## 9 FM.current
## 10 FM.passive.ambush
## # … with 17 more rows
This lists the traits and counts the number of species-level records for a specific traitName.
# This is the number of taxon observation per trait. The number of taxa with trait records vary with bodyLengthMax having the most trait data. The coverage of rate trait data is limited because this dataset includes only trait observations with temperature information.
traits.summary <- zooplankton.traits %>%
filter(dataType != "binary") %>%
# Add information about the trait bucket
left_join(distinct(directory.trait, traitBucket, traitID),
by = "traitID") %>%
# Assign trait buckets as factors
mutate(traitBucket = factor(traitBucket,
levels = c("morphological","composition",
"physiological","behavioral",
"life history"))) %>%
# Isolate relevant columns
distinct(traitBucket, traitID, traitName, taxonID) %>%
# Add up the number of taxon records per trait
group_by(traitBucket, traitName, traitID) %>%
summarise(Nspecies = n()) %>%
ungroup() %>%
# Arrange the traits in decreasing order for the figure
arrange(-Nspecies) %>%
mutate(traitName = fct_reorder(traitName, Nspecies))
## `summarise()` has grouped output by 'traitBucket', 'traitName'. You can
## override using the `.groups` argument.
# Stacked barplot with count at end
trait.bar <- ggplot(traits.summary, aes(x = traitName, y = Nspecies)) +
geom_bar(stat = "identity") +
# Set axis break
scale_y_continuous(breaks = c(0, 250, 500, 750, 1000, 3000),
limits = c(0, 3180)) +
scale_y_break(c(1250,2980), expand = FALSE) +
# Add number of species at the end of the bar
geom_text(aes(label = after_stat(y), group = traitName),
stat = "summary", fun = sum, hjust = -0.1,
size = 3) +
coord_flip() +
xlab("Trait") + ylab("# Species") +
# facets proportioned by number of traits
facet_grid(traitBucket ~., scales = "free_y", space = "free", switch = "y") +
theme(strip.placement = "outside")
trait.bar
## Species-TraitName by major group We can visualize the taxonomic
coverage by major group.
# Assign colors to major groups
majorgroup.colors <- data.frame(
majorgroup = c("Calanoid","Non-calanoid","Amphipod","Decapod","Euphausiid",
"Mysid","Ostracod","Polychaete","Pteropod",
"Chaetognath","Appendicularian","Thaliacean","Cladoceran",
"Hydromedusae","Siphonophore","Scyphomedusae","Ctenophore"),
color = c("blue","yellowgreen","cornflowerblue","lightseagreen","forestgreen",
"darkseagreen2","cyan3","tan1","olivedrab",
"darkorchid","violetred","hotpink3","saddlebrown",
"tomato1","goldenrod3","yellow3","red4"))
# This is similar to the chunk above but with different groupings
traits.summary <- zooplankton.traits %>%
filter(dataType != "binary") %>%
# Add information about the trait bucket
left_join(distinct(directory.trait, traitBucket, traitID),
by = "traitID") %>%
# Assign trait buckets as factors
mutate(traitBucket = factor(traitBucket,
levels = c("morphological","composition",
"physiological","behavioral",
"life history"))) %>%
# Isolate relevant columns
distinct(traitBucket, traitID, traitName, majorgroup, taxonID) %>%
# Add up the number of taxon records per trait
group_by(traitBucket, traitName, majorgroup, traitID) %>%
# Set the levels of the major groups
mutate(majorgroup = factor(majorgroup,
levels = majorgroup.colors$majorgroup)) %>%
summarise(Nspecies = n()) %>%
ungroup() %>%
# Arrange the traits in decreasing order for the figure
group_by(traitBucket, traitName) %>%
mutate(Nspecies.total = sum(Nspecies)) %>%
ungroup() %>%
arrange(-Nspecies.total) %>%
mutate(traitName = fct_reorder(traitName, Nspecies.total))
## `summarise()` has grouped output by 'traitBucket', 'traitName', 'majorgroup'.
## You can override using the `.groups` argument.
# Stacked barplot with count at end
trait.bar <- ggplot(traits.summary, aes(x = traitName, y = Nspecies,
fill = majorgroup)) +
geom_bar(stat = "identity") +
# Set axis break
scale_y_continuous(breaks = c(0, 250, 500, 750, 1000, 3000),
limits = c(0, 3180)) +
scale_y_break(c(1250,2980), expand = FALSE) +
# Add number of species at the end of the bar
geom_text(aes(label = after_stat(y), group = traitName),
stat = "summary", fun = sum, hjust = -0.1,
size = 3) +
scale_fill_manual(values = majorgroup.colors$color,
limits = majorgroup.colors$majorgroup,
name = "Major group") +
coord_flip() +
xlab("Trait") + ylab("# Species") +
# facets proportioned by number of traits
facet_grid(traitBucket ~., scales = "free_y", space = "free", switch = "y") +
theme(strip.placement = "outside")
trait.bar
Here, we inspect the number of traits per species and continue to check how evenly distributed the dataset is. This figure shows that most species have only a few traits assigned to them, but there are a few species with a lot of trait information known.
traits.summary.2 <- zooplankton.traits %>%
filter(dataType != "binary") %>%
distinct(traitName, scientificName, taxonID) %>%
group_by(taxonID, scientificName) %>%
summarise(Ntraits = n()) %>%
arrange(-Ntraits)
## `summarise()` has grouped output by 'taxonID'. You can override using the
## `.groups` argument.
head(traits.summary.2)
## # A tibble: 6 × 3
## # Groups: taxonID [6]
## taxonID scientificName Ntraits
## <dbl> <chr> <int>
## 1 630 Calanus finmarchicus 47
## 2 3023 Euphausia pacifica 45
## 3 1478 Neocalanus plumchrus 43
## 4 1475 Neocalanus cristatus 42
## 5 3079 Aglantha digitale 42
## 6 4894 Aurelia aurita 41
traits.summary.2 <- traits.summary.2 %>%
group_by(Ntraits) %>%
summarise(Ntraits.total = sum(Ntraits)) %>%
ungroup() %>%
arrange(-Ntraits)
ggplot(traits.summary.2, aes(x = Ntraits, y = Ntraits.total)) +
geom_point(size = 2, colour = "black") +
geom_segment( aes(x=Ntraits, xend=Ntraits, y=0, yend=Ntraits.total)) +
xlab("Number of traits per species") +
ylab("Number of records")
# Numerical traits
# As an example, we can inspect the range of body lengths in each major group.
body.length <- zooplankton.traits %>%
filter(traitName == "bodyLengthMax") %>%
mutate(traitValue = as.numeric(traitValue)) %>%
# calculate the mean to sort major groups
group_by(majorgroup) %>%
mutate(mean.size = mean(traitValue)) %>%
ungroup() %>%
mutate(majorgroup = fct_reorder(majorgroup, mean.size))
# In this dataset, body length varies across four orders of magnitude. Many major groups have overlaping and similar body sizes. This suggests that if we approach research questions from, say, a size-spectrum perspective, taxonomic differences between major groups might not be that important.
ggplot(body.length, aes(x = majorgroup, y = traitValue)) +
geom_violin() +
# visualize the mean
stat_summary(fun.data = "mean_cl_boot", geom = "pointrange",
colour = "red") +
scale_y_continuous(labels = scaleFUN, trans = "log10") +
theme(axis.text.x = element_text(angle = 45, hjust=1))
## Warning: Groups with fewer than two data points have been dropped.
## Warning: Removed 1 rows containing missing values (`geom_segment()`).
# Although, there would be taxonomic nuances in how size is measured. A measurement of total body length for individuals with an elongated body plan is typical but for more spherical and/or colonial organisms, length could be based on a specific body axis or individual in colonial organisms. Below lists the various body length axes in the dataset.
(cleanStrings(unique(body.length$sizeType)))
## [1] "anterior nectophore length; bell height; bell width; body height; body width; nectophore height; nectophore length; nectophore width; not given; posterior nectophore length; prosome length; shell length; total length; trunk length"
# One important trait is nitrogen content. For example, this can be used a currency in biogeochemical models. When considering composition traits, it is important to distinguish total or percent-weight trait values and choose which would be most appropirate for a particular analysis.
nitrogen.content <- zooplankton.traits %>%
# inspect total nitrogen content and the fraction of dry weight which is nitrogen
filter(traitName %in% c("nitrogenTotal", "nitrogenPDW")) %>%
mutate(traitValue = as.numeric(traitValue))
ggplot(nitrogen.content, aes(x = majorgroup, y = traitValue)) +
geom_violin() +
# visualize the mean
stat_summary(fun.data = "mean_cl_boot", geom = "pointrange",
colour = "red") +
scale_y_continuous(labels = scaleFUN, trans = "log10") +
theme(axis.text.x = element_text(angle = 45, hjust=1)) +
facet_wrap(~traitName, ncol = 1, scales = "free_y")
## Warning: Groups with fewer than two data points have been dropped.
## Warning: Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Warning: Removed 2 rows containing missing values (`geom_segment()`).
## Removed 2 rows containing missing values (`geom_segment()`).
In this dataset, categorical traits, particularly behavioral traits, are some of the most common records. Although numerical traits are useful (and much needed) in quantitative models, categorical traits are often used to characterize zooplankton assemblages or even as predictor variables. Here we inspect some of the commonly utilized categorical traits in zooplankton studies: reproduction mode, feeding mode, trophic group, and vertical distribution.
Note that in all these traits, an organism can be associated with multiple trait values (e.g., salps have a sexual broadcasting reproduction strategy and an asexual reproduction strategy). This should be considered when performing analysis using categorical traits. For now we will visualize all instances of a categorical trait.
# Visualize the number of species associated with a particular categorical trait
vertical.distribution <- zooplankton.traits %>%
filter(traitName == "verticalDistribution") %>%
group_by(traitValue) %>%
summarise(Nspecies = n())
ggplot(vertical.distribution, aes(x = traitValue, y = Nspecies)) +
geom_bar(stat = "identity") +
coord_flip() +
xlab("Vertical distribution") + ylab("# Species") +
scale_x_discrete(limits = rev(c("epipelagic","epimesopelagic","epibathypelagic",
"mesopelagic","mesobathypelagic","bathypelagic")))
# The trait information could be accessed by calling the categorical version of the trait (e.g., "reproductionMode"), but for some instances, using the binary version would be easier in analysis or visuals.
repro.mode <- zooplankton.traits %>%
filter(traitName %in% c("RM.broadcasting","RM.brooding","RM.asexual"),
traitValue == 1) %>%
# rename trait levels by removing "RM." prefix
mutate(traitName = str_replace(traitName, "RM.",""))
# Bar plot of total species with a particular trait value, organized by major group.
repro.mode <- repro.mode %>%
group_by(traitName, majorgroup) %>%
summarise(Nspecies = n())
## `summarise()` has grouped output by 'traitName'. You can override using the
## `.groups` argument.
ggplot(repro.mode, aes(x = traitName, y = Nspecies,
fill = majorgroup)) +
geom_bar(stat = "identity") +
scale_fill_manual(values = majorgroup.colors$color,
limits = majorgroup.colors$majorgroup,
name = "Major group") +
coord_flip() +
xlab("Reproduction mode") + ylab("# Species")
# Proportion of reproduction modes per major group
ggplot(repro.mode, aes(x = majorgroup, y = Nspecies,
fill = traitName)) +
geom_bar(position = "fill", stat = "identity") +
coord_flip() +
scale_y_continuous(labels = scales::percent) +
scale_x_discrete(limits = rev) +
xlab("Major group") + ylab("% Species")
# Relationship between trophic group and feeding mode
# There are different ways in which the feeding mode trait is coded.
unique(filter(zooplankton.traits, grepl("FM.",traitName))$traitName)
## [1] "FM.passive" "FM.active.ambush" "FM.ambush"
## [4] "FM.active" "FM.current" "FM.passive.ambush"
## [7] "FM.cruise" "FM.particle.feeder" "FM.parasite"
## [10] "FM.commensal"
# For now, we can consider, ambush, cruise, and current feeding and check if there might be a relationship between feeding mode and the general trophic group a species is often clustered into.
feeding.trophic <- zooplankton.traits %>%
filter(traitName %in% c("FM.ambush","FM.cruise","FM.current","FM.particle.feeder"),
traitValue == 1) %>%
# rename the trait variable
rename(feeding.mode = traitName) %>%
# rename trait levels by removing "RM." prefix
mutate(feeding.mode = str_replace(feeding.mode, "FM.","")) %>%
# merge with the column on trophic groups
left_join(select(filter(zooplankton.traits,
traitName %in% c("TG.omnivore","TG.herbivore",
"TG.carnivore","TG.detritivore")),
taxonID, trophic.group = traitName),
by = "taxonID") %>%
mutate(trophic.group = str_replace(trophic.group, "TG.","")) %>%
relocate(feeding.mode, trophic.group) %>%
# only consider instances when information for both traits are available
filter(!is.na(feeding.mode) & !is.na(trophic.group)) %>%
group_by(feeding.mode, trophic.group) %>%
summarise(Nspecies = n())
## `summarise()` has grouped output by 'feeding.mode'. You can override using the
## `.groups` argument.
ggplot(feeding.trophic, aes(x = trophic.group, y = Nspecies,
fill = feeding.mode)) +
geom_bar(position = "fill", stat = "identity") +
scale_y_continuous(labels = scales::percent) +
xlab("Major group") + ylab("% Species")
Size is commonly referred to as a “master trait” because many biological attributes scales with size. There are multiple measures of size such as length or biomass. For zooplankton, this is is an important concern when considering both crustaceans and gelatinous or soft-bodied species. Gelatinous zooplankton have an “inflated” size because they have a higher water content. Note that for the figures below, we are simply using the geom_smooth() function of ggplot to estimate and visualize a linear model. If we to formally derive a regression, care must be taken in choosing the regression model and data curation.
unique(zooplankton.traits$traitName)
## [1] "carbonWeight" "dryWeight"
## [3] "afdwPDW" "wetWeight"
## [5] "dryWeightPWW" "waterPWW"
## [7] "carbonPDW" "nitrogenPDW"
## [9] "ratioNP" "carbonPWW"
## [11] "ratioCN" "phosphorusPDW"
## [13] "ratioCP" "nitrogenTotal"
## [15] "phosphorusTotal" "proteinPDW"
## [17] "lipidPDW" "ashPDW"
## [19] "proteinPWW" "lipidPWW"
## [21] "ratioProteinLipid" "carbohydratePDW"
## [23] "ashTotal" "energyDensityWW"
## [25] "energyContent" "energyDensityVolume"
## [27] "energyDensityDW" "clearanceRate_15C"
## [29] "clearanceRate_WSC_15C" "ingestionRate_15C"
## [31] "ingestionRate_WSC_15C" "respirationRate_15C"
## [33] "respirationRate_WSC_15C" "growthRate_15C"
## [35] "growthRate_WSC_15C" "clutchSize"
## [37] "fecundity" "eggWeight"
## [39] "eggDiameter" "developmentDuration"
## [41] "respirationRate_WSDW_15C" "respirationRate_WSWW_15C"
## [43] "excretionRateN_15C" "excretionRateN_WSDW_15C"
## [45] "excretionRateP_15C" "excretionRateP_WSDW_15C"
## [47] "bodyLengthMax" "feedingMode"
## [49] "reproductionMode" "trophicGroup"
## [51] "verticalDistribution" "dielVerticalMigration"
## [53] "hibernationStage" "myelination"
## [55] "bioluminescence" "habitatAssociation"
## [57] "TG.omnivore" "TG.herbivore"
## [59] "TG.carnivore" "TG.detritivore"
## [61] "FM.passive" "FM.active.ambush"
## [63] "FM.ambush" "FM.active"
## [65] "FM.current" "FM.passive.ambush"
## [67] "FM.cruise" "FM.particle.feeder"
## [69] "FM.parasite" "FM.commensal"
## [71] "RM.broadcasting" "RM.brooding"
## [73] "RM.asexual" "MYE.absent"
## [75] "MYE.present" "DVM.absent"
## [77] "DVM.present" "DVM.weak"
## [79] "DVM.reverse" "DVM.strong"
## [81] "HIB.present" "BIO.absent"
## [83] "BIO.present"
# Because body length is a commonly reported size measurement, we can inspect how the measures of weight scale with length.
size.traits <- zooplankton.traits %>%
# filter species with weight measurements
filter(traitName %in% c("carbonWeight", "dryWeight", "wetWeight")) %>%
# Append body length as a column for comparison
left_join(dplyr::select(filter(zooplankton.traits,
traitName == "bodyLengthMax"),
taxonID, bodyLength = traitValue),
by = "taxonID") %>%
# Filter out instances when there are no length information.
filter(!is.na(bodyLength)) %>%
# Make sure to convert continuous traits to numeric
mutate(traitValue = as.numeric(traitValue),
bodyLength = as.numeric(bodyLength))
# It is clear that weight scales with length, but there seems to be quite some variability in this relationship.
size.plot <- ggplot(data = size.traits,
aes(x = bodyLength, y = traitValue, color = traitName)) +
geom_point() +
# Add labels for inspecting points
geom_point(aes(text = paste0(scientificName," (",majorgroup,")"))) +
# Add a linear model
geom_smooth(method = "lm") +
xlab("Length (mm)") + ylab("Weight (mg)") +
# Log transform axis scales
scale_x_continuous(labels = scaleFUN, trans = "log10") +
scale_y_continuous(labels = scaleFUN, trans = "log10")
## Warning in geom_point(aes(text = paste0(scientificName, " (", majorgroup, :
## Ignoring unknown aesthetics: text
ggplotly(size.plot)
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation:
## x_plotlyDomain, y_plotlyDomain
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?
# To further inspect this scaling, we can separate hard-bodied crustacean zooplankton from gelatinous and soft-bodied zooplankton.
size.traits <- size.traits %>%
# Assign a body plan grouping variable
mutate(bodyPlan = if_else(phylum == "Arthropoda","hard-body","soft-body"))
# Focus on carbon weight. Here we see that body plan mediates the slope of the weight - length relationship.
size.plot.2 <- ggplot(data = filter(size.traits, traitName == "carbonWeight"),
aes(x = bodyLength, y = traitValue, color = bodyPlan)) +
# Add labels for inspecting points
geom_point(aes(text = paste0(scientificName," (",majorgroup,")"))) +
# Add a linear model
geom_smooth(method = "lm") +
xlab("Length (mm)") + ylab("Weight (mg)") +
# Log transform axis scales
scale_x_continuous(labels = scaleFUN, trans = "log10") +
scale_y_continuous(labels = scaleFUN, trans = "log10")
## Warning in geom_point(aes(text = paste0(scientificName, " (", majorgroup, :
## Ignoring unknown aesthetics: text
ggplotly(size.plot.2)
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation:
## x_plotlyDomain, y_plotlyDomain
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?
# Allometric scaling of physiological rates with size
# A common application of size as a master trait is using it as a predictor for other traits, especially physiological rates. Here, we inspect respiration rate, which is the most common rate trait in the dataset. Carbon weight will be used as the measure for size.
rate.size <- zooplankton.traits %>%
# select respiration rate traits
filter(traitName %in% c("respirationRate_15C", "respirationRate_WSC_15C")) %>%
# Append carbon weight as a column for comparison
left_join(dplyr::select(filter(zooplankton.traits,
traitName == "carbonWeight"),
taxonID, carbonWeight = traitValue),
by = "taxonID") %>%
# Filter out instances when there are no carbon weight information.
filter(!is.na(carbonWeight)) %>%
# Make sure to convert continuous traits to numeric
mutate(traitValue = as.numeric(traitValue),
carbonWeight = as.numeric(carbonWeight))
# Here we see carbon weight scales with size. Weight-specific rate values often have flatter slope, and sometimes negative slope.
rate.size.plot <- ggplot(data = rate.size,
aes(x = carbonWeight, y = traitValue, color = traitName)) +
# Add labels for inspecting points
geom_point(aes(text = paste0(scientificName," (",majorgroup,")"))) +
# Add a linear model
geom_smooth(method = "lm") +
# xlab("Length (mm)") + ylab("Weight (mg)") +
# Log transform axis scales
scale_x_continuous(labels = scaleFUN, trans = "log10") +
scale_y_continuous(labels = scaleFUN, trans = "log10")
## Warning in geom_point(aes(text = paste0(scientificName, " (", majorgroup, :
## Ignoring unknown aesthetics: text
ggplotly(rate.size.plot)
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation:
## x_plotlyDomain, y_plotlyDomain
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?
# Now, can inspect the size scaling of the excretion rate of N as ammonia. The data source for excretion rates used dry weight as the associated size measure, so we reference excretion rate relative to dry weight.
rate.size.2 <- zooplankton.traits %>%
# select respiration rate traits
filter(traitName %in% c("excretionRateN_15C", "excretionRateN_WSDW_15C")) %>%
# Append dry weight as a column for comparison
left_join(dplyr::select(filter(zooplankton.traits,
traitName == "dryWeight"),
taxonID, dryWeight = traitValue),
by = "taxonID") %>%
# Filter out instances when there are no dry weight information.
filter(!is.na(dryWeight)) %>%
# Make sure to convert continuous traits to numeric
mutate(traitValue = as.numeric(traitValue),
dryWeight = as.numeric(dryWeight))
rate.size.plot.2 <- ggplot(data = rate.size.2,
aes(x = dryWeight, y = traitValue, color = traitName)) +
# Add labels for inspecting points
geom_point(aes(text = paste0(scientificName," (",majorgroup,")"))) +
# Add a linear model
geom_smooth(method = "lm") +
# xlab("Length (mm)") + ylab("Weight (mg)") +
# Log transform axis scales
scale_x_continuous(labels = scaleFUN, trans = "log10") +
scale_y_continuous(labels = scaleFUN, trans = "log10")
## Warning in geom_point(aes(text = paste0(scientificName, " (", majorgroup, :
## Ignoring unknown aesthetics: text
ggplotly(rate.size.plot.2)
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation:
## x_plotlyDomain, y_plotlyDomain
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?
We can further look into how a trait might affect the distribution of other traits or the allometric scaling relationship of traits.
rate.size <- zooplankton.traits %>%
# select respiration rate traits
filter(traitName %in% c("respirationRate_15C", "respirationRate_WSC_15C")) %>%
# Append carbon weight as a column for comparison
left_join(dplyr::select(filter(zooplankton.traits,
traitName == "carbonWeight"),
taxonID, carbonWeight = traitValue),
by = "taxonID") %>%
# Filter out instances when there are no carbon weight information.
filter(!is.na(carbonWeight)) %>%
# Make sure to convert continuous traits to numeric
mutate(traitValue = as.numeric(traitValue),
carbonWeight = as.numeric(carbonWeight))
# One broad way to classify zooplankton is whether they have a passive or active feeding mode. Passive feeding include ambush feeding behavior while active feeding includes raptorial cruise feeding and current or filter-feeding.
fm.activity <- zooplankton.traits %>%
filter(traitName %in% c("FM.passive","FM.active"),
traitValue == 1) %>%
# transform for merging with the size data
select(taxonID, feeding.activity = traitName) %>%
# Check if there are multiple records for the same taxon. This may not be a concern because some species employ multiple behavioral strategies. Note that this may result in duplicated rows when doing a left join.
group_by(taxonID) %>%
mutate(nvals = n()) %>%
arrange(-nvals)
# We know that respiration rate scales with size, but does feeding activity mediate this relationship? To explore this question, we include a feeding activity column to the rate-size figure. Here we see that active feeders have a slightly higher respiration rate compared to passive feeders. But is this a significant difference? Could the taxonomic coverage of the species included in this analysis limit what we can infer? Would it be more informative if the individual-level trait data was used?
rate.size.activity <- rate.size %>%
left_join(fm.activity, by = "taxonID") %>%
relocate(feeding.activity) %>%
filter(!is.na(feeding.activity))
rate.activity.plot <- ggplot(data = filter(rate.size.activity,
traitName == "respirationRate_15C"),
aes(x = carbonWeight, y = traitValue, color = feeding.activity)) +
# Add labels for inspecting points
geom_point(aes(text = paste0(scientificName," (",majorgroup,")"))) +
# Add a linear model
geom_smooth(method = "lm") +
ylab("Respiration rate") +
# Log transform axis scales
scale_x_continuous(labels = scaleFUN, trans = "log10") +
scale_y_continuous(labels = scaleFUN, trans = "log10")
## Warning in geom_point(aes(text = paste0(scientificName, " (", majorgroup, :
## Ignoring unknown aesthetics: text
ggplotly(rate.activity.plot)
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation:
## x_plotlyDomain, y_plotlyDomain
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?